Data

Which years are missing a metric

dat %>% 
  ggplot(aes(year, lakeid, fill=is.na(daynum))) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~metric, nrow=6) 

  # labs(fill = "Missing") + 
  # scale_fill_scico(palette = 'imola')

# dat %>% 
#   ggplot(aes(year, lakeid, fill=daynum)) +
#   geom_tile(color="grey") +
#   theme_bw() +
#   facet_wrap(~metric, nrow=6) + 
#   # labs(fill = "Missing") + 
#   scale_fill_viridis_c()

Remaining data issues:

  • Northern lakes don’t have DOC until until 1986; missing first 4 years when all other variables are available. Potential fixes:
    • Exclude DOC; start analyses in 1986; fill DOC with e.g., median value, or model of other variables?
  • Two missing iceoff dates in AL (1982) and CB (1988). Potential fix:
    • Fill with function/model of other lake ice on dates
  • Southern lakes missing chlorophyll data: 1996 - 1999 and 2002 - 2004; 2002-04 is bad/uncalibrated values, used DOY but not magnitude; earlier years fill with median
  • Zoop data issues:
    • CB missing daphnia peak 1995(?): fill with medain
    • FI missing several years from 1996 - 2006; fill with medain

Predict ice-on in northern lakes from other lakes ice-on dates

n_iceoff_wide = dat %>% 
  select(-sampledate)%>% 
  filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR") & metric == "iceoff") %>% 
  pivot_wider(names_from = lakeid, values_from = daynum)

al_iceoff_model = lm(AL~BM+CB+CR+SP+TB+TR, data=n_iceoff_wide)
summary(al_iceoff_model)
## 
## Call:
## lm(formula = AL ~ BM + CB + CR + SP + TB + TR, data = n_iceoff_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3721 -0.8451  0.1265  0.9507  2.7265 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -3.10967    3.27070  -0.951   0.3496  
## BM          -0.03295    0.18851  -0.175   0.8624  
## CB           0.24815    0.12648   1.962   0.0594 .
## CR           0.20333    0.17076   1.191   0.2434  
## SP           0.31430    0.20384   1.542   0.1339  
## TB           0.19500    0.10280   1.897   0.0678 .
## TR           0.09211    0.13278   0.694   0.4934  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.779 on 29 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9793, Adjusted R-squared:  0.975 
## F-statistic: 228.9 on 6 and 29 DF,  p-value: < 2.2e-16
plot(n_iceoff_wide$AL, predict(al_iceoff_model, n_iceoff_wide), pch=16)

# actually looks good; use to fill
cb_iceoff_model = lm(CB~AL+BM+CR+SP+TB+TR, data=n_iceoff_wide)
summary(cb_iceoff_model)
## 
## Call:
## lm(formula = CB ~ AL + BM + CR + SP + TB + TR, data = n_iceoff_wide)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3604 -0.8546  0.0970  1.5519  3.4296 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  3.02887    4.54691   0.666   0.5106  
## AL           0.47220    0.24068   1.962   0.0594 .
## BM           0.04037    0.26007   0.155   0.8777  
## CR           0.46368    0.22535   2.058   0.0487 *
## SP           0.04008    0.29239   0.137   0.8919  
## TB           0.08250    0.14956   0.552   0.5854  
## TR          -0.14222    0.18277  -0.778   0.4428  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.455 on 29 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.9584, Adjusted R-squared:  0.9498 
## F-statistic: 111.5 on 6 and 29 DF,  p-value: < 2.2e-16
plot(n_iceoff_wide$CB, predict(cb_iceoff_model, n_iceoff_wide), pch=16)

# really good; go ahead and fill

al_missing_iceoff = n_iceoff_wide %>% filter(is.na(AL))
al_missing_iceoff$iceoff_fill = round(predict(al_iceoff_model, al_missing_iceoff))
al_iceoff_fill = al_missing_iceoff %>% 
  select(metric, year, iceoff_fill) %>% 
  rename(daynum_fill = iceoff_fill) %>% 
  mutate(metric = "iceoff", lakeid = "AL") 

cb_missing_iceoff = n_iceoff_wide %>% filter(is.na(CB))
cb_missing_iceoff$iceoff_fill = round(predict(cb_iceoff_model, cb_missing_iceoff))
cb_iceoff_fill = cb_missing_iceoff %>% 
  select(metric, year, iceoff_fill) %>% 
  rename(daynum_fill = iceoff_fill) %>% 
  mutate(metric = "iceoff", lakeid = "CB")

iceoff_fill = bind_rows(al_iceoff_fill, cb_iceoff_fill)

Predict DOC daynum from other variable daynum’s in each northern lake

no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
n_lakes_wide = left_join(no_dups, dat) %>% 
  select(-sampledate) %>% 
  pivot_wider(names_from = metric, values_from = daynum) %>% 
  filter(lakeid %in% c("AL", "BM", "CB", "CR", "SP", "TB", "TR"))
## Joining, by = c("lakeid", "year", "metric")
hold_data = na.omit(data.frame(n_lakes_wide %>%  ungroup() %>% select(-year, -N))) # , -doc_predict
all = lm(doc_epiMax~(iceon+straton+stratoff+energy+
                 stability+anoxia_summer+
                 secchi_openwater_max+total_zoop_biomass+daphnia_biomass)*lakeid, data=hold_data)
i_o = lm(doc_epiMax~1, data=hold_data)
hold_step = step(i_o, scope=formula(all))
## Start:  AIC=1747.95
## doc_epiMax ~ 1
## 
##                        Df Sum of Sq    RSS    AIC
## + lakeid                6     74514 394308 1720.3
## + stratoff              1     15366 453457 1742.3
## + total_zoop_biomass    1     10343 458479 1744.8
## + daphnia_biomass       1      4684 464138 1747.7
## <none>                              468822 1748.0
## + secchi_openwater_max  1      1158 467664 1749.4
## + iceon                 1       663 468159 1749.6
## + straton               1       526 468297 1749.7
## + anoxia_summer         1       477 468345 1749.7
## + stability             1       213 468609 1749.8
## + energy                1        62 468760 1749.9
## 
## Step:  AIC=1720.32
## doc_epiMax ~ lakeid
## 
##                        Df Sum of Sq    RSS    AIC
## + stratoff              1      6463 387845 1718.5
## + daphnia_biomass       1      4607 389700 1719.6
## + total_zoop_biomass    1      3796 390512 1720.1
## + energy                1      3779 390529 1720.1
## <none>                              394308 1720.3
## + anoxia_summer         1      2246 392062 1721.0
## + iceon                 1      1797 392511 1721.3
## + straton               1       132 394176 1722.2
## + secchi_openwater_max  1        27 394281 1722.3
## + stability             1        14 394294 1722.3
## - lakeid                6     74514 468822 1748.0
## 
## Step:  AIC=1718.53
## doc_epiMax ~ lakeid + stratoff
## 
##                        Df Sum of Sq    RSS    AIC
## + energy                1      4580 383265 1717.8
## + daphnia_biomass       1      3963 383881 1718.2
## + total_zoop_biomass    1      3796 384049 1718.3
## <none>                              387845 1718.5
## + iceon                 1      2961 384884 1718.8
## + anoxia_summer         1      2474 385371 1719.1
## - stratoff              1      6463 394308 1720.3
## + straton               1       221 387623 1720.4
## + stability             1        86 387758 1720.5
## + secchi_openwater_max  1         7 387838 1720.5
## + stratoff:lakeid       6     11324 376521 1723.8
## - lakeid                6     65612 453457 1742.3
## 
## Step:  AIC=1717.81
## doc_epiMax ~ lakeid + stratoff + energy
## 
##                        Df Sum of Sq    RSS    AIC
## + daphnia_biomass       1      4970 378295 1716.8
## + total_zoop_biomass    1      4401 378864 1717.2
## + iceon                 1      4116 379149 1717.3
## <none>                              383265 1717.8
## + anoxia_summer         1      2431 380833 1718.3
## - energy                1      4580 387845 1718.5
## + straton               1      1240 382025 1719.1
## + stability             1       344 382921 1719.6
## + secchi_openwater_max  1        49 383215 1719.8
## - stratoff              1      7264 390529 1720.1
## + stratoff:lakeid       6     10575 372690 1723.4
## + energy:lakeid         6      1479 381786 1728.9
## - lakeid                6     69453 452718 1744.0
## 
## Step:  AIC=1716.82
## doc_epiMax ~ lakeid + stratoff + energy + daphnia_biomass
## 
##                          Df Sum of Sq    RSS    AIC
## + iceon                   1      3501 374794 1716.7
## <none>                                378295 1716.8
## + anoxia_summer           1      2232 376063 1717.5
## - daphnia_biomass         1      4970 383265 1717.8
## + total_zoop_biomass      1      1335 376960 1718.0
## + straton                 1      1071 377223 1718.2
## - energy                  1      5586 383881 1718.2
## + stability               1       174 378121 1718.7
## - stratoff                1      6597 384892 1718.8
## + secchi_openwater_max    1        58 378237 1718.8
## + stratoff:lakeid         6     11129 367165 1722.0
## + daphnia_biomass:lakeid  6      5445 372850 1725.5
## + energy:lakeid           6      2077 376218 1727.6
## - lakeid                  6     69095 447390 1743.2
## 
## Step:  AIC=1716.69
## doc_epiMax ~ lakeid + stratoff + energy + daphnia_biomass + iceon
## 
##                          Df Sum of Sq    RSS    AIC
## <none>                                374794 1716.7
## - iceon                   1      3501 378295 1716.8
## - daphnia_biomass         1      4355 379149 1717.3
## + anoxia_summer           1      1404 373390 1717.8
## + total_zoop_biomass      1      1334 373460 1717.9
## + straton                 1       920 373873 1718.1
## + stability               1       205 374589 1718.6
## + secchi_openwater_max    1        45 374748 1718.7
## - energy                  1      6670 381463 1718.7
## - stratoff                1      8015 382808 1719.5
## + stratoff:lakeid         6     10348 364446 1722.3
## + iceon:lakeid            6      8385 366409 1723.5
## + daphnia_biomass:lakeid  6      4546 370248 1725.9
## + energy:lakeid           6      2281 372512 1727.3
## - lakeid                  6     68233 443026 1743.0
doc_model = lm(doc_epiMax~(iceon+straton+stratoff+energy+
                 stability+anoxia_summer+
                 secchi_openwater_max+total_zoop_biomass+daphnia_biomass)*lakeid, 
               data=n_lakes_wide, 
               na.action = na.exclude)
summary(doc_model)
## 
## Call:
## lm(formula = doc_epiMax ~ (iceon + straton + stratoff + energy + 
##     stability + anoxia_summer + secchi_openwater_max + total_zoop_biomass + 
##     daphnia_biomass) * lakeid, data = n_lakes_wide, na.action = na.exclude)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -141.770  -17.901   -0.181   18.898  108.897 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                    3.278e+02  1.526e+02   2.148  0.03314 * 
## iceon                         -5.510e-01  3.258e-01  -1.691  0.09267 . 
## straton                        2.033e-02  3.200e-01   0.064  0.94941   
## stratoff                       6.192e-01  3.397e-01   1.823  0.07017 . 
## energy                        -4.519e-01  2.277e-01  -1.985  0.04882 * 
## stability                     -1.468e-02  1.493e-01  -0.098  0.92178   
## anoxia_summer                 -5.953e-02  1.682e-01  -0.354  0.72392   
## secchi_openwater_max          -8.503e-02  8.359e-02  -1.017  0.31053   
## total_zoop_biomass             3.360e-02  6.958e-02   0.483  0.62981   
## daphnia_biomass                1.050e-01  7.661e-02   1.370  0.17241   
## lakeid.L                      -6.303e+02  4.229e+02  -1.490  0.13803   
## lakeid.Q                       5.458e+01  4.027e+02   0.136  0.89235   
## lakeid.C                       4.539e+02  4.127e+02   1.100  0.27304   
## lakeid^4                      -1.095e+02  4.081e+02  -0.268  0.78880   
## lakeid^5                      -8.383e+01  4.023e+02  -0.208  0.83517   
## lakeid^6                      -2.015e+02  3.718e+02  -0.542  0.58865   
## iceon:lakeid.L                 8.564e-01  8.399e-01   1.020  0.30942   
## iceon:lakeid.Q                 6.180e-01  8.622e-01   0.717  0.47456   
## iceon:lakeid.C                -5.787e-01  8.657e-01  -0.668  0.50477   
## iceon:lakeid^4                 5.088e-01  8.240e-01   0.617  0.53776   
## iceon:lakeid^5                 1.452e-01  8.907e-01   0.163  0.87071   
## iceon:lakeid^6                 1.563e+00  8.875e-01   1.761  0.08007 . 
## straton:lakeid.L              -1.351e-01  8.442e-01  -0.160  0.87309   
## straton:lakeid.Q              -6.182e-02  8.174e-01  -0.076  0.93980   
## straton:lakeid.C              -3.454e-02  8.578e-01  -0.040  0.96793   
## straton:lakeid^4               1.203e+00  8.626e-01   1.395  0.16496   
## straton:lakeid^5               1.096e+00  8.712e-01   1.258  0.21021   
## straton:lakeid^6               4.844e-01  8.248e-01   0.587  0.55782   
## stratoff:lakeid.L              5.681e-01  9.167e-01   0.620  0.53634   
## stratoff:lakeid.Q             -4.552e-01  8.073e-01  -0.564  0.57368   
## stratoff:lakeid.C             -2.335e-01  9.276e-01  -0.252  0.80157   
## stratoff:lakeid^4             -7.509e-01  9.877e-01  -0.760  0.44820   
## stratoff:lakeid^5              2.994e-01  9.383e-01   0.319  0.75005   
## stratoff:lakeid^6             -1.898e+00  7.990e-01  -2.376  0.01865 * 
## energy:lakeid.L                4.598e-02  6.108e-01   0.075  0.94008   
## energy:lakeid.Q               -2.602e-01  5.772e-01  -0.451  0.65272   
## energy:lakeid.C               -1.765e-02  6.038e-01  -0.029  0.97671   
## energy:lakeid^4                1.434e-01  6.484e-01   0.221  0.82521   
## energy:lakeid^5               -3.331e-01  5.968e-01  -0.558  0.57749   
## energy:lakeid^6                1.740e-01  5.746e-01   0.303  0.76238   
## stability:lakeid.L             1.712e-01  3.825e-01   0.448  0.65505   
## stability:lakeid.Q             1.750e-01  4.023e-01   0.435  0.66416   
## stability:lakeid.C            -2.472e-01  4.025e-01  -0.614  0.53997   
## stability:lakeid^4            -3.813e-01  3.455e-01  -1.103  0.27146   
## stability:lakeid^5             1.388e-01  4.214e-01   0.329  0.74238   
## stability:lakeid^6             3.208e-02  4.108e-01   0.078  0.93786   
## anoxia_summer:lakeid.L         1.323e-02  5.044e-01   0.026  0.97911   
## anoxia_summer:lakeid.Q         8.017e-02  4.902e-01   0.164  0.87029   
## anoxia_summer:lakeid.C        -1.146e-01  4.358e-01  -0.263  0.79294   
## anoxia_summer:lakeid^4        -7.539e-03  4.701e-01  -0.016  0.98723   
## anoxia_summer:lakeid^5        -5.039e-01  3.541e-01  -1.423  0.15655   
## anoxia_summer:lakeid^6         3.292e-01  3.973e-01   0.829  0.40852   
## secchi_openwater_max:lakeid.L  6.604e-01  2.400e-01   2.752  0.00658 **
## secchi_openwater_max:lakeid.Q -4.356e-01  2.357e-01  -1.848  0.06642 . 
## secchi_openwater_max:lakeid.C -3.006e-01  2.287e-01  -1.315  0.19046   
## secchi_openwater_max:lakeid^4  1.969e-01  2.017e-01   0.976  0.33036   
## secchi_openwater_max:lakeid^5 -3.331e-01  2.168e-01  -1.536  0.12644   
## secchi_openwater_max:lakeid^6  1.143e-01  2.007e-01   0.569  0.56986   
## total_zoop_biomass:lakeid.L    1.146e-01  1.798e-01   0.638  0.52462   
## total_zoop_biomass:lakeid.Q   -2.267e-01  1.896e-01  -1.196  0.23347   
## total_zoop_biomass:lakeid.C   -3.778e-01  1.868e-01  -2.023  0.04470 * 
## total_zoop_biomass:lakeid^4   -2.103e-02  1.615e-01  -0.130  0.89651   
## total_zoop_biomass:lakeid^5   -1.744e-01  1.935e-01  -0.901  0.36874   
## total_zoop_biomass:lakeid^6    1.050e-01  1.914e-01   0.548  0.58412   
## daphnia_biomass:lakeid.L      -1.785e-02  2.143e-01  -0.083  0.93373   
## daphnia_biomass:lakeid.Q       7.194e-02  2.108e-01   0.341  0.73335   
## daphnia_biomass:lakeid.C       2.495e-01  2.037e-01   1.225  0.22246   
## daphnia_biomass:lakeid^4       2.395e-02  2.018e-01   0.119  0.90569   
## daphnia_biomass:lakeid^5       2.480e-01  1.925e-01   1.288  0.19948   
## daphnia_biomass:lakeid^6      -1.686e-01  1.919e-01  -0.879  0.38079   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 41.73 on 165 degrees of freedom
##   (38 observations deleted due to missingness)
## Multiple R-squared:  0.389,  Adjusted R-squared:  0.1334 
## F-statistic: 1.522 on 69 and 165 DF,  p-value: 0.01581
n_lakes_wide$doc_predict = predict(doc_model, n_lakes_wide)

cor = n_lakes_wide %>% 
  group_by(lakeid) %>% 
  summarise(r = paste("r =", round(cor(doc_epiMax, doc_predict, use="complete.obs"), 3)))

n_lakes_wide %>% 
  ggplot(aes(x=doc_epiMax, y=doc_predict, color=as.character(lakeid))) +
  geom_point()+
  theme_bw() +
  labs(color="lakeid")+
  geom_abline(slope=1, intercept = 0) + 
  facet_wrap(~lakeid) + 
  geom_text(aes(label=r), data=cor, x=300, y=0) +
  labs(x="obs peak DOC (daynum)",  y="modeled peak DOC (daynum)")
## Warning: Removed 38 rows containing missing values (`geom_point()`).

# not good but vaguely positive relationship; use this instead of median?
missing_doc = n_lakes_wide %>% filter(is.na(doc_epiMax))

missing_doc$doc_fill = round(predict(doc_model, missing_doc))

doc_fill = missing_doc %>% 
  select(lakeid, year, doc_fill) %>% 
  rename(daynum_fill = doc_fill) %>% 
  mutate(metric = "doc_epiMax") %>% 
  filter(year < 2000)

Try to fill the missing chlorophyll values in the southern lakes

**Using dates from bad/uncalibrated chl data

filled_chl_dates = read_csv("Data/derived/chl_filled_peaks.csv") %>% 
  rename(daynum_fill = daynum) %>% 
  select(-sampledate)
## Rows: 36 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): lakeid, metric
## dbl  (2): year, daynum
## date (1): sampledate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Create filled metric column and filled

all_fill = bind_rows(iceoff_fill, doc_fill, filled_chl_dates)
dat_filled = full_join(dat, all_fill) %>% 
  mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, FALSE)) %>% 
  mutate(daynum_fill = ifelse(is.na(daynum) & !is.na(daynum_fill), daynum_fill, daynum))
## Joining, by = c("lakeid", "year", "metric")
dat_filled$lakeid = factor(dat_filled$lakeid, 
                    levels = c("AL", "BM", "CB", "CR", "SP", "TB", "TR", "FI", "ME", "MO", "WI"), 
                    ordered = T)
vars = c("iceoff", "straton", "secchi_all", "secchi_openwater_max","secchi_openwater_min", "daphnia_biomass","zoopDensity",
        "total_zoop_biomass", "chlor_all", "chlor_spring", "chlor_fall", "doc_epiMax",
        "totpuf_epiMax", "totpuf_epiMin", "totpuf_hypoMax", "totpuf_hypoMin", 
        "anoxia", "anoxia_summer", "stability", "energy", "stratoff", "iceon")
dat_filled$metric = factor(dat_filled$metric,
                    levels = rev(vars),
                    ordered=T)

dat_filled %>% 
  ggplot(aes(year, metric, fill=is.na(daynum_fill))) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~lakeid, nrow=6) # + 

  # labs(fill = "Missing") + 
  # scale_fill_viridis_c() 

Additional trimming/filling

Trim to “good” years, fill FI ice data with Monona, then fill additional missing with median values for each lake/metric

df_yearsWant = dat_filled %>% 
  filter((lakeid %in% c("FI", "ME", "MO", "WI") & year %in% 1996:2018) |
           (!(lakeid %in% c("FI", "ME", "MO", "WI")) & year %in% 1982:2018))

fi_icefill = df_yearsWant %>% 
  filter(lakeid == "MO" & metric %in% c("iceoff", "iceon")) %>% 
  mutate(lakeid = "FI") %>% 
  select(-daynum, -filled_flag, -sampledate) %>% 
  rename(icefill = daynum_fill)

df_yearsWant = df_yearsWant %>% 
  full_join(fi_icefill) %>% 
  mutate(daynum_fill = ifelse(is.na(daynum_fill) & !is.na(icefill), 
                              icefill, daynum),
         filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill) & !is.na(icefill), 
                              TRUE, filled_flag))
## Joining, by = c("lakeid", "year", "metric")
df_yearsWant %>% 
  ggplot(aes(year, metric, fill=filled_flag)) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~lakeid, nrow=6)

Final fill w/ medians

all_lys = df_yearsWant %>% 
  select(lakeid, year) %>% 
  distinct()

metric = unique(df_yearsWant$metric)

all_lys_want = expand_grid(all_lys, metric)

dat_final = left_join(all_lys_want, df_yearsWant)
## Joining, by = c("lakeid", "year", "metric")
medians = dat_final %>% 
  group_by(lakeid, metric) %>% 
  summarise(median_daynum = median(daynum_fill, na.rm=T))
## `summarise()` has grouped output by 'lakeid'. You can override using the
## `.groups` argument.
dat_final = dat_final %>% 
  left_join(medians) %>% 
  mutate(daynum_fill = ifelse(is.na(daynum_fill), median_daynum, daynum_fill)) %>% 
  mutate(filled_flag = ifelse(is.na(daynum) & !is.na(daynum_fill), TRUE, filled_flag)) %>% 
  select(-icefill, -median_daynum)
## Joining, by = c("lakeid", "metric")
dat_final %>%  
  ggplot(aes(year, metric, fill=daynum_fill)) +
  geom_tile(color="grey") +
  theme_bw() +
  facet_wrap(~lakeid, nrow=6)

write_csv(dat_final, "Data/analysis_ready/final_combined_dates_filled_v2.csv")

========================================================================================= ## Old code: no longer used

chlor_all

no_dups = dat %>% group_by(lakeid, year, metric) %>% summarise(N = n()) %>% filter(N == 1)
## `summarise()` has grouped output by 'lakeid', 'year'. You can override using
## the `.groups` argument.
s_lakes_wide = left_join(no_dups, dat) %>% 
  select(-sampledate) %>% 
  pivot_wider(names_from = metric, values_from = daynum) %>% 
  filter(lakeid %in% c("FI", "ME", "MO", "WI"))
## Joining, by = c("lakeid", "year", "metric")
# stepwise
hold_data = na.omit(data.frame(s_lakes_wide %>% filter(lakeid != "FI") %>% select(-chlor_spring, -chlor_fall, -daphnia_biomass, -total_zoop_biomass)))
all = lm(chlor_all~., data=hold_data)
i_o = lm(chlor_all~1, data=hold_data)
hold = step(i_o, scope=formula(all))
## Start:  AIC=336.42
## chlor_all ~ 1
## 
##                        Df Sum of Sq    RSS    AIC
## + lakeid                1   23765.6 147231 332.44
## + secchi_openwater_min  1   12222.0 158774 335.45
## + totpuf_epiMin         1   10680.2 160316 335.84
## + stratoff              1    9424.6 161572 336.15
## <none>                              170996 336.42
## + secchi_openwater_max  1    6617.2 164379 336.84
## + straton               1    3156.5 167840 337.68
## + zoopDensity           1    2542.1 168454 337.82
## + anoxia_summer         1    1765.7 169231 338.01
## + totpuf_hypoMax        1     992.6 170004 338.19
## + doc_epiMax            1     588.4 170408 338.28
## + totpuf_epiMax         1     340.8 170656 338.34
## + totpuf_hypoMin        1     111.7 170885 338.39
## + year                  1      81.3 170915 338.40
## + iceon                 1      75.2 170921 338.40
## + energy                1      23.2 170973 338.42
## + stability             1      21.0 170975 338.42
## + iceoff                1       0.7 170996 338.42
## 
## Step:  AIC=332.44
## chlor_all ~ lakeid
## 
##                        Df Sum of Sq    RSS    AIC
## + stratoff              1    9424.6 137806 331.79
## + straton               1    7685.7 139545 332.29
## <none>                              147231 332.44
## + secchi_openwater_min  1    5905.7 141325 332.80
## + totpuf_epiMin         1    5006.7 142224 333.05
## + zoopDensity           1    4245.0 142986 333.26
## + totpuf_epiMax         1    3735.8 143495 333.41
## + anoxia_summer         1    3720.5 143510 333.41
## + iceon                 1    2087.2 145144 333.86
## + totpuf_hypoMax        1    1502.5 145728 334.02
## + secchi_openwater_max  1    1484.8 145746 334.03
## + energy                1     572.3 146658 334.28
## + iceoff                1     380.4 146850 334.33
## + stability             1     253.0 146978 334.37
## + totpuf_hypoMin        1     232.8 146998 334.37
## + doc_epiMax            1     100.6 147130 334.41
## + year                  1      81.3 147149 334.41
## - lakeid                1   23765.6 170996 336.42
## 
## Step:  AIC=331.79
## chlor_all ~ lakeid + stratoff
## 
##                        Df Sum of Sq    RSS    AIC
## <none>                              137806 331.79
## + straton               1    5781.8 132024 332.07
## - stratoff              1    9424.6 147231 332.44
## + anoxia_summer         1    3362.7 134443 332.80
## + totpuf_epiMin         1    2578.2 135228 333.03
## + stability             1    2242.3 135564 333.13
## + zoopDensity           1    2237.4 135569 333.13
## + secchi_openwater_min  1    2168.7 135637 333.15
## + secchi_openwater_max  1    1590.3 136216 333.32
## + totpuf_hypoMax        1    1498.8 136307 333.35
## + iceon                 1     732.2 137074 333.58
## + energy                1     698.6 137108 333.59
## + doc_epiMax            1     352.8 137453 333.69
## + iceoff                1     320.6 137486 333.70
## + year                  1     109.6 137697 333.76
## + totpuf_epiMax         1      91.9 137714 333.76
## + totpuf_hypoMin        1       0.3 137806 333.79
## - lakeid                1   23765.6 161572 336.15
chlAll_model_MEMOWI = lm(chlor_all~anoxia_summer + lakeid + iceon + stability + straton + stratoff, 
               data=s_lakes_wide %>% filter(lakeid != "FI"))
summary(chlAll_model_MEMOWI)
## 
## Call:
## lm(formula = chlor_all ~ anoxia_summer + lakeid + iceon + stability + 
##     straton + stratoff, data = s_lakes_wide %>% filter(lakeid != 
##     "FI"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -115.40  -44.11    0.33   41.41  122.46 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   -149.4119   271.2274  -0.551   0.5841  
## anoxia_summer    0.6454     0.3620   1.783   0.0806 .
## lakeid.L       -32.6015    26.4849  -1.231   0.2240  
## lakeid.Q       -32.9677    19.6738  -1.676   0.0999 .
## iceon            0.9296     0.6700   1.388   0.1713  
## stability       -0.5412     0.2784  -1.944   0.0574 .
## straton          0.6513     0.3881   1.678   0.0995 .
## stratoff        -0.4440     0.5099  -0.871   0.3880  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 63.02 on 51 degrees of freedom
##   (16 observations deleted due to missingness)
## Multiple R-squared:  0.2751, Adjusted R-squared:  0.1756 
## F-statistic: 2.765 on 7 and 51 DF,  p-value: 0.01627
plot(s_lakes_wide %>% filter(lakeid != "FI") %>% pull(chlor_all), 
     predict(chlAll_model_MEMOWI, s_lakes_wide %>% filter(lakeid != "FI")),
     col=s_lakes_wide$lakeid, pch=16, xlab="observed", ylab="predicted")

# using to fill for now
missing_chlAll_MEMOWI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid != "FI")
missing_chlAll_MEMOWI$chl_all_fill = round(predict(chlAll_model_MEMOWI, missing_chlAll_MEMOWI))

chlAll_model_FI = lm(chlor_all~(stratoff+energy+
                 stability+anoxia_summer+
                 secchi_openwater_max), 
               data=s_lakes_wide %>% filter(lakeid == "FI"))
summary(chlAll_model_FI)
## 
## Call:
## lm(formula = chlor_all ~ (stratoff + energy + stability + anoxia_summer + 
##     secchi_openwater_max), data = s_lakes_wide %>% filter(lakeid == 
##     "FI"))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -85.14 -30.10   1.29  39.92 108.12 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          874.1551   485.7957   1.799   0.0952 .
## stratoff               0.4276     1.1790   0.363   0.7227  
## energy                -3.0495     1.3009  -2.344   0.0356 *
## stability             -0.2118     0.7354  -0.288   0.7779  
## anoxia_summer         -0.2712     0.6247  -0.434   0.6713  
## secchi_openwater_max  -0.2282     0.2481  -0.920   0.3745  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 61.41 on 13 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.3268, Adjusted R-squared:  0.06793 
## F-statistic: 1.262 on 5 and 13 DF,  p-value: 0.337
plot(s_lakes_wide %>% filter(lakeid == "FI") %>% pull(chlor_all), 
     predict(chlAll_model_FI, s_lakes_wide %>% filter(lakeid == "FI")),
     col=s_lakes_wide$lakeid, pch=16)

missing_chlAll_FI = s_lakes_wide %>% filter(is.na(chlor_all) & lakeid == "FI")
missing_chlAll_FI$chl_all_fill = round(predict(chlAll_model_FI, missing_chlAll_FI))
# gives negative value; DONT fill FI

chlAll_fill = missing_chlAll_MEMOWI %>% 
  select(lakeid, year, chl_all_fill) %>% 
  rename(daynum_fill = chl_all_fill) %>% 
  mutate(metric = "chlor_all")